home *** CD-ROM | disk | FTP | other *** search
/ Personal Computer World 2009 February / PCWFEB09.iso / Software / Resources / Chat & Communication / Digsby build 37 / digsby_setup.exe / lib / util / httptools.pyo (.txt) < prev    next >
Python Compiled Bytecode  |  2008-10-13  |  11KB  |  349 lines

  1. # Source Generated with Decompyle++
  2. # File: in.pyo (Python 2.5)
  3.  
  4. from __future__ import with_statement
  5. from callbacks import callsback
  6. from threads import threaded
  7. from threads.timeout_thread import Timer
  8. from net import build_opener, build_cookie
  9. import re
  10. import StringIO
  11. import cookielib
  12. import urllib2
  13. import logging
  14. import lxml.etree as ET
  15. import lxml.html as HTML
  16. import operator
  17. from contextlib import closing
  18. itemgetter0 = operator.itemgetter(0)
  19. log = logging.getLogger('httptools')
  20.  
  21. class RequestOpener(object):
  22.     retries = 3
  23.     pause_for_attempts = 1
  24.     js_redirect_res = ((re.compile('window.location.replace\\("(.*?)"\\);'), 1),)
  25.     
  26.     def __init__(self, opener, request, data = None, **kwds):
  27.         self.openfunc = getattr(opener, 'open', opener)
  28.         retries = kwds.pop('retries', None)
  29.         if retries is not None:
  30.             self.retries = retries
  31.         
  32.         if isinstance(request, basestring):
  33.             request = urllib2.Request.make_request(request, data, **kwds)
  34.         
  35.         self.request = request
  36.         self._sub_requester = None
  37.         self.callback = None
  38.  
  39.     
  40.     def open(self, callback = None):
  41.         if self.callback is not None:
  42.             raise Exception('Request already in progress')
  43.         
  44.         self.callback = callback
  45.         self._attempt_open()
  46.  
  47.     open = callsback(open)
  48.     
  49.     def _attempt_open(self):
  50.         self.openfunc(self.request, success = self._check_success, error = self._check_error)
  51.  
  52.     
  53.     def preprocess_response(self, resp):
  54.         closing(resp).__enter__()
  55.         
  56.         try:
  57.             data = resp.read()
  58.         finally:
  59.             pass
  60.  
  61.         sio = StringIO.StringIO(data)
  62.         for attr in ('read', 'seek', 'close', 'tell'):
  63.             setattr(resp, attr, getattr(sio, attr))
  64.         
  65.         resp._stringio = sio
  66.         resp.content = data
  67.         return resp
  68.  
  69.     
  70.     def _check_success(self, resp):
  71.         resp = self.preprocess_response(resp)
  72.         
  73.         try:
  74.             self.redirect(resp)
  75.         except Exception:
  76.             e = None
  77.             error = self.check_resp_for_errors(resp)
  78.             if error is None:
  79.                 self.finish('success', resp)
  80.             else:
  81.                 self._on_error(error)
  82.         except:
  83.             error is None
  84.  
  85.  
  86.     
  87.     def _redirect_success(self, resp):
  88.         self._sub_requester = None
  89.         self.finish('success', resp)
  90.  
  91.     
  92.     def _redirect_error(self, err = None):
  93.         self._sub_requester = None
  94.         self._on_error(err)
  95.  
  96.     
  97.     def redirect(self, resp):
  98.         if self._sub_requester is not None:
  99.             raise Exception('Redirect already in progress')
  100.         
  101.         redirect = self.make_redirect_request(resp)
  102.         new = self._sub_requester = type(self)(self.openfunc, redirect)
  103.         new.open(success = self._redirect_success, error = self._redirect_error)
  104.  
  105.     
  106.     def make_redirect_request(self, resp):
  107.         for redirecter in (self._find_http_redirect, self._find_js_redirect):
  108.             redirect = redirecter(resp)
  109.             if redirect is not None:
  110.                 if not redirect.startswith('http'):
  111.                     if not redirect.startswith('/'):
  112.                         redirect = '/' + redirect
  113.                     
  114.                     redirect = self.request.get_type() + '://' + self.request.get_host() + redirect
  115.                 
  116.                 log.debug('got redirect: %r', redirect)
  117.                 return redirect
  118.                 continue
  119.         
  120.         raise Exception("Couldn't find URL for redirect in %r" % resp.content)
  121.  
  122.     
  123.     def _find_http_redirect(self, resp):
  124.         if resp.code in (301, 302):
  125.             return resp.headers.get('Location', None)
  126.         
  127.  
  128.     
  129.     def _find_js_redirect(self, resp):
  130.         for redirect_re, url_group_id in self.js_redirect_res:
  131.             match = redirect_re.search(resp.content)
  132.             if match:
  133.                 new_url = match.group(url_group_id)
  134.                 if new_url:
  135.                     return new_url
  136.                 
  137.             new_url
  138.         
  139.  
  140.     
  141.     def check_resp_for_errors(self, resp):
  142.         pass
  143.  
  144.     
  145.     def _check_error(self, err = None):
  146.         self._on_error(err)
  147.  
  148.     
  149.     def _on_error(self, e = None):
  150.         self.retries -= 1
  151.         if self.retries:
  152.             if self.pause_for_attempts > 0:
  153.                 Timer(self.pause_for_attempts, self._attempt_open).start()
  154.             else:
  155.                 self._attempt_open()
  156.         else:
  157.             self.finish('error', e)
  158.  
  159.     
  160.     def finish(self, result, *args):
  161.         cb = self.callback
  162.         self.callback = None
  163.         self._sub_request = None
  164.         getattr(cb, result, (lambda : pass))(*args)
  165.  
  166.  
  167.  
  168. def dispatcher(what, arg_getter):
  169.     
  170.     def dispatch(self, *args):
  171.         name = arg_getter(args)
  172.         handler = getattr(self, '%s_%s' % (what, name), getattr(self, '%s_default' % what, None))
  173.         if handler is not None:
  174.             return handler(*args)
  175.         else:
  176.             log.error('No default handler for %r', what)
  177.  
  178.     return dispatch
  179.  
  180.  
  181. class WebScraper(object):
  182.     CookieJarFactory = cookielib.CookieJar
  183.     HttpOpenerFactory = staticmethod(build_opener)
  184.     RequestFactory = staticmethod(urllib2.Request.make_request)
  185.     domain = None
  186.     urls = { }
  187.     
  188.     def __init__(self):
  189.         self._waiting = set()
  190.         self._callbacks = { }
  191.         self.init_http()
  192.  
  193.     
  194.     def init_http(self):
  195.         self._jar = self.CookieJarFactory()
  196.         self.http = self.HttpOpenerFactory(urllib2.HTTPCookieProcessor(self._jar))
  197.  
  198.     
  199.     def get_cookie(self, key, default = sentinel, domain = None, path = '/'):
  200.         if domain is None:
  201.             domain = self.domain
  202.         
  203.         val = default
  204.         
  205.         try:
  206.             self._jar._cookies_lock.__enter__()
  207.             
  208.             try:
  209.                 val = self._jar._cookies[domain][path][key].value
  210.             finally:
  211.                 pass
  212.  
  213.         except (AttributeError, KeyError):
  214.             e = None
  215.             if val is sentinel:
  216.                 raise e
  217.             else:
  218.                 return val
  219.         except:
  220.             val is sentinel
  221.  
  222.         return val
  223.  
  224.     
  225.     def set_cookie(self, key, value, domain = None, path = '/'):
  226.         if domain is None:
  227.             domain = self.domain
  228.         
  229.         self._jar._cookies_lock.__enter__()
  230.         
  231.         try:
  232.             domain_dict = self._jar._cookies.setdefault(domain, { })
  233.             path_dict = domain_dict.setdefault(path, { })
  234.             cookie = path_dict.get(key, None)
  235.             if cookie is None:
  236.                 cookie = build_cookie(key, value, domain = domain, path = path)
  237.                 path_dict[key] = cookie
  238.             else:
  239.                 cookie.value = value
  240.         finally:
  241.             pass
  242.  
  243.  
  244.     
  245.     def set_waiting(self, *things):
  246.         self._waiting.update(things)
  247.  
  248.     
  249.     def clear_waiting(self, *things):
  250.         self._waiting -= set(things)
  251.         if not self._waiting:
  252.             self.done_waiting()
  253.         
  254.  
  255.     
  256.     def done_waiting(self):
  257.         pass
  258.  
  259.     
  260.     def request(self, name, callback = None):
  261.         if name in self._waiting:
  262.             log.warning('already waiting for %r', name)
  263.             return None
  264.         
  265.         self._callbacks[name] = callback
  266.         req = self.build_request(name)
  267.         self.perform_request(name, req)
  268.  
  269.     request = callsback(request)
  270.     
  271.     def perform_request(self, name, req):
  272.         self.set_waiting(name)
  273.         if req is None:
  274.             return self.error_handler(name)(Exception('No request created for %r' % name))
  275.         
  276.         reqopen = RequestOpener(threaded(self.http.open), req)
  277.         reqopen.open(success = self.success_handler(name), error = self.error_handler(name))
  278.  
  279.     
  280.     def error_handler(self, name):
  281.         
  282.         def handler(e = (None, None)):
  283.             self.clear_waiting(name)
  284.             cb = self._callbacks.pop(name, None)
  285.             retval = self.handle_error(name, e)
  286.             if cb is not None:
  287.                 cb.error(e)
  288.             
  289.             return retval
  290.  
  291.         return handler
  292.  
  293.     
  294.     def success_handler(self, name):
  295.         
  296.         def handler(resp):
  297.             self.clear_waiting(name)
  298.             resp = self.preprocess_resp(name, resp)
  299.             newresp = self.handle_success(name, resp)
  300.             if newresp is not None:
  301.                 resp = newresp
  302.             
  303.             cb = self._callbacks.pop(name, None)
  304.             if cb is not None:
  305.                 cb.success(resp)
  306.             
  307.             return newresp
  308.  
  309.         return handler
  310.  
  311.     build_request = dispatcher('build_request', itemgetter0)
  312.     handle_error = dispatcher('handle_error', itemgetter0)
  313.     preprocess_resp = dispatcher('preprocess_resp', itemgetter0)
  314.     handle_success = dispatcher('handle_success', itemgetter0)
  315.     
  316.     def build_request_default(self, name):
  317.         link = self.urls[name]
  318.         if callable(link):
  319.             link = link()
  320.         
  321.         return self.RequestFactory(link)
  322.  
  323.     
  324.     def handle_error_default(self, name, e):
  325.         log.error('Error requesting %r: %r', name, e)
  326.  
  327.     
  328.     def handle_success_default(self, name, resp):
  329.         if resp.document is not None:
  330.             print HTML.tostring(resp.document, pretty_print = True)
  331.         else:
  332.             print 'Got None for lxml doc. code/status= %r' % ((resp.code, resp.msg, str(resp.headers)),)
  333.  
  334.     
  335.     def preprocess_resp_default(self, name, resp):
  336.         data = resp.content
  337.         if data:
  338.             document = HTML.fromstring(data, base_url = resp.geturl())
  339.             document.make_links_absolute()
  340.             resp.document = document
  341.         else:
  342.             resp.document = None
  343.         return resp
  344.  
  345.  
  346. if __name__ == '__main__':
  347.     pass
  348.  
  349.